NYT mask use

Github source for data https://github.com/nytimes/covid-19-data/tree/master/mask-use

Getting the data

#Source for data 
url <- "https://github.com/nytimes/covid-19-data/raw/master/mask-use/mask-use-by-county.csv"

nyt_mask_survey <- read_csv(here::here("data", "nyt_mask_survey.csv"))

nyt_mask_survey <- nyt_mask_survey %>%
  clean_names() %>% 
  mutate(
    mostly_yes= frequently+always,
    mostly_no = never+rarely,
    delta = mostly_yes-mostly_no
  )

glimpse(nyt_mask_survey)
## Rows: 3,142
## Columns: 9
## $ countyfp   <chr> "01001", "01003", "01005", "01007", "01009", "01011"…
## $ never      <dbl> 0.053, 0.083, 0.067, 0.020, 0.053, 0.031, 0.102, 0.1…
## $ rarely     <dbl> 0.074, 0.059, 0.121, 0.034, 0.114, 0.040, 0.053, 0.1…
## $ sometimes  <dbl> 0.134, 0.098, 0.120, 0.096, 0.180, 0.144, 0.257, 0.1…
## $ frequently <dbl> 0.295, 0.323, 0.201, 0.278, 0.194, 0.286, 0.137, 0.1…
## $ always     <dbl> 0.444, 0.436, 0.491, 0.572, 0.459, 0.500, 0.451, 0.4…
## $ mostly_yes <dbl> 0.739, 0.759, 0.692, 0.850, 0.653, 0.786, 0.588, 0.6…
## $ mostly_no  <dbl> 0.127, 0.142, 0.188, 0.054, 0.167, 0.071, 0.155, 0.2…
## $ delta      <dbl> 0.612, 0.617, 0.504, 0.796, 0.486, 0.715, 0.433, 0.3…

Choropleth map

The FIPS code is a federal code that numbers states and territories of the US. It extends to the county level with an additional four digits, so every county in the US has a unique six-digit identifier, where the first two digits represent the state.

We will be using Kieran Healy’s socviz package which among other things contains county_map and county_data

# America’s choropleths; use county_map that has all polygons 
# and county data with demographics/election data from socviz datafile
# The id field is the FIPS code for the county
county_map %>% 
  sample_n(5)
##      long     lat  order  hole piece            group    id
## 1   57111 -244795 110862 FALSE     1 0500000US31103.1 31103
## 2 1423522 -669990  66797 FALSE     1 0500000US21129.1 21129
## 3 1259604 -680561  63644 FALSE     1 0500000US21029.1 21029
## 4  275597 -267007 110332 FALSE     1 0500000US31051.1 31051
## 5   27728 -137268 148481 FALSE     1 0500000US46085.1 46085
county_data %>%
    sample_n(5)
##      id           name state census_region      pop_dens   pop_dens4
## 1 46115   Spink County    SD       Midwest [    0,   10) [  0,   17)
## 2 55085  Oneida County    WI       Midwest [   10,   50) [ 17,   45)
## 3 21235 Whitley County    KY         South [   50,  100) [ 45,  118)
## 4 05029  Conway County    AR         South [   10,   50) [ 17,   45)
## 5 26137  Otsego County    MI       Midwest [   10,   50) [ 45,  118)
##     pop_dens6   pct_black   pop female white black travel_time land_area
## 1 [  0,    9) [ 0.0, 2.0)  6598   49.6  96.7   0.7        15.8      1504
## 2 [ 25,   45) [ 0.0, 2.0) 35563   50.1  96.6   0.5        19.2      1113
## 3 [ 45,   82) [ 0.0, 2.0) 35503   51.0  97.1   0.8        19.7       438
## 4 [ 25,   45) [10.0,15.0) 21083   51.0  85.1  11.4        22.2       552
## 5 [ 45,   82) [ 0.0, 2.0) 24158   50.8  96.4   0.5        20.5       515
##   hh_income su_gun4 su_gun6  fips votes_dem_2016 votes_gop_2016
## 1     48911 [ 0, 5) [ 0, 4) 46115            919           1854
## 2     45759 [ 8,11) [ 8,10) 55085           8103          11677
## 3     29769 [11,54] [12,54] 21235           2067          11312
## 4     35225 [11,54] [10,12)  5029           2655           4844
## 5     47584 [ 8,11) [ 8,10) 26137           3556           8266
##   total_votes_2016 per_dem_2016 per_gop_2016 diff_2016 per_dem_2012
## 1             2951        0.311        0.628       935        0.427
## 2            20837        0.389        0.560      3574        0.484
## 3            13765        0.150        0.822      9245        0.205
## 4             7837        0.339        0.618      2189        0.389
## 5            12529        0.284        0.660      4710        0.396
##   per_gop_2012 diff_2012 winner partywinner16 winner12 partywinner12
## 1        0.549       370  Trump    Republican   Romney    Republican
## 2        0.505       466  Trump    Republican   Romney    Republican
## 3        0.783      7549  Trump    Republican   Romney    Republican
## 4        0.584      1508  Trump    Republican   Romney    Republican
## 5        0.592      2327  Trump    Republican   Romney    Republican
##   flipped
## 1      No
## 2      No
## 3      No
## 4      No
## 5      No
glimpse(county_data)
## Rows: 3,195
## Columns: 32
## $ id               <chr> "0", "01000", "01001", "01003", "01005", "0100…
## $ name             <chr> NA, "1", "Autauga County", "Baldwin County", "…
## $ state            <fct> NA, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL…
## $ census_region    <fct> NA, South, South, South, South, South, South, …
## $ pop_dens         <fct> "[   50,  100)", "[   50,  100)", "[   50,  10…
## $ pop_dens4        <fct> "[ 45,  118)", "[ 45,  118)", "[ 45,  118)", "…
## $ pop_dens6        <fct> "[ 82,  215)", "[ 82,  215)", "[ 82,  215)", "…
## $ pct_black        <fct> "[10.0,15.0)", "[25.0,50.0)", "[15.0,25.0)", "…
## $ pop              <int> 318857056, 4849377, 55395, 200111, 26887, 2250…
## $ female           <dbl> 50.8, 51.5, 51.5, 51.2, 46.5, 46.0, 50.6, 45.2…
## $ white            <dbl> 77.7, 69.8, 78.1, 87.3, 50.2, 76.3, 96.0, 27.2…
## $ black            <dbl> 13.2, 26.6, 18.4, 9.5, 47.6, 22.1, 1.8, 69.9, …
## $ travel_time      <dbl> 25.5, 24.2, 26.2, 25.9, 24.6, 27.6, 33.9, 26.9…
## $ land_area        <dbl> 3531905, 50645, 594, 1590, 885, 623, 645, 623,…
## $ hh_income        <int> 53046, 43253, 53682, 50221, 32911, 36447, 4414…
## $ su_gun4          <fct> NA, NA, "[11,54]", "[11,54]", "[ 5, 8)", "[11,…
## $ su_gun6          <fct> NA, NA, "[10,12)", "[10,12)", "[ 7, 8)", "[10,…
## $ fips             <dbl> 0, 1000, 1001, 1003, 1005, 1007, 1009, 1011, 1…
## $ votes_dem_2016   <int> NA, NA, 5908, 18409, 4848, 1874, 2150, 3530, 3…
## $ votes_gop_2016   <int> NA, NA, 18110, 72780, 5431, 6733, 22808, 1139,…
## $ total_votes_2016 <int> NA, NA, 24661, 94090, 10390, 8748, 25384, 4701…
## $ per_dem_2016     <dbl> NA, NA, 0.2396, 0.1957, 0.4666, 0.2142, 0.0847…
## $ per_gop_2016     <dbl> NA, NA, 0.734, 0.774, 0.523, 0.770, 0.899, 0.2…
## $ diff_2016        <int> NA, NA, 12202, 54371, 583, 4859, 20658, 2391, …
## $ per_dem_2012     <dbl> NA, NA, 0.266, 0.216, 0.513, 0.262, 0.123, 0.7…
## $ per_gop_2012     <dbl> NA, NA, 0.726, 0.774, 0.483, 0.731, 0.865, 0.2…
## $ diff_2012        <int> NA, NA, 11012, 47443, 334, 3931, 17780, 2808, …
## $ winner           <chr> NA, NA, "Trump", "Trump", "Trump", "Trump", "T…
## $ partywinner16    <chr> NA, NA, "Republican", "Republican", "Republica…
## $ winner12         <chr> NA, NA, "Romney", "Romney", "Obama", "Romney",…
## $ partywinner12    <chr> NA, NA, "Republican", "Republican", "Democrat"…
## $ flipped          <chr> NA, NA, "No", "No", "Yes", "No", "No", "No", "…
# we have data on 3195 FIPS....

glimpse(county_map)
## Rows: 191,382
## Columns: 7
## $ long  <dbl> 1225889, 1235324, 1244873, 1244129, 1272010, 1276797, 127…
## $ lat   <dbl> -1275020, -1274008, -1272331, -1267515, -1262889, -129551…
## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ hole  <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
## $ piece <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ group <fct> 0500000US01001.1, 0500000US01001.1, 0500000US01001.1, 050…
## $ id    <chr> "01001", "01001", "01001", "01001", "01001", "01001", "01…
# ... but to create a map, we translate these 3195 counties to 191,382 polygons!

Joing the files

We have three files

  1. nyt_mask_survey, our NYT survey data,
  2. county_map that has all polygons that define a county
  3. county_data with demographics/election data.
county_full <- left_join(county_map, county_data, by = "id")

county_masks_full <- left_join(county_full, nyt_mask_survey, 
                                by = c("id"="countyfp"))

Building our choropleth plot

p <- ggplot(data = county_masks_full,
            mapping = aes(x = long, y = lat,
                          fill = delta, 
                          group = group))

p1 <- p + 
  geom_polygon(color = "gray90", size = 0.05) + 
  coord_equal()



p2 <- p1 + 
  scale_fill_gradient(low = '#ffffcc', high= '#006837')

p3 <- p1 + 
  scale_fill_gradient2()

# get different colours from https://colorbrewer2.org/
# the one shown here is https://colorbrewer2.org/#type=diverging&scheme=BrBG&n=6
p4 <- p1 + 
  scale_fill_gradientn(colours = c('#8c510a','#d8b365','#f6e8c3','#c7eae5','#5ab4ac','#01665e'))

p1

p2

p3

p4

p4 + labs(fill = "Mask acceptance, (Mostly Yes - Mostly No)", 
          caption = "“Estimates from The New York Times, based on roughly 250,000 interviews \nconducted by Dynata from July 2 to July 14, 2020”") +
  guides(fill = guide_legend(nrow = 1)) + 
  theme_map() + 
  theme(legend.position = "bottom")

Checking for relationships

Does mask use acceptance have any relation with some demographics? Let us explor the relationship between country household income, population, and % who voted republican in 2016

county_masks_full %>% 
  select(hh_income, pop, per_gop_2016, delta) %>% 
  GGally::ggpairs()+
  theme_minimal()